1 Overview of the datasets

1.1 Loading the LAT and MX2 data

pp.mset <- phenomis::reading(ProMetIS::post_processed_dir.c(),
                             report.c = "none")[, ProMetIS::sets.vc()]

1.2 Number of samples and features for each dataset

1.2.1 Post-processed

The number of sample and (annotated) features after the 2_post_preprocessed step are as follows:

pp_exprs_mn.ls <- MultiDataSet::as.list(pp.mset)
pp_dims.mn <- t(sapply(pp_exprs_mn.ls, dim))

pp_dims.mn <- cbind(pp_dims.mn,
                    Annotated = sapply(names(pp.mset),
                                       function(set.c) {
                                         fdata.df <- Biobase::fData(pp.mset[[set.c]])
                                         if (grepl("(proteo|preclin)", set.c)) {
                                           return(nrow(fdata.df))
                                         } else {
                                           return(sum(fdata.df[, "name"] != ""))
                                         }
                                       })[rownames(pp_dims.mn)])

pp_dims.mn <- pp_dims.mn[c("preclinical",
                           "proteomics_liver",
                           "metabolomics_liver_c18hypersil_pos",
                           "metabolomics_liver_hilic_neg",
                           "proteomics_plasma",
                           "metabolomics_plasma_c18hypersil_pos",
                           "metabolomics_plasma_hilic_neg",
                           "metabolomics_plasma_c18acquity_pos",
                           "metabolomics_plasma_c18acquity_neg"), ]

rownames(pp_dims.mn) <- c("preclinical",
                          "liver_proteomics",
                          "liver_metabo_c18hypersil_pos",
                          "liver_metabo_hilic_neg",
                          "plasma_proteomics",
                          "plasma_metabo_c18hypersil_pos",
                          "plasma_metabo_hilic_neg",
                          "plasma_metabo_c18acquity_pos",
                          "plasma_metabo_c18acquity_neg")

colnames(pp_dims.mn)[1:2] <- c("Features", "Samples")
pp_dims.mn <- pp_dims.mn[, c("Samples", "Features", "Annotated")]

knitr::kable(pp_dims.mn)
Samples Features Annotated
preclinical 42 200 200
liver_proteomics 42 2187 2187
liver_metabo_c18hypersil_pos 42 5665 138
liver_metabo_hilic_neg 42 2866 199
plasma_proteomics 36 446 446
plasma_metabo_c18hypersil_pos 42 4788 113
plasma_metabo_hilic_neg 42 3131 191
plasma_metabo_c18acquity_pos 42 6104 78
plasma_metabo_c18acquity_neg 42 1584 49
pp_dims.ggplot <- phenomis::gg_barplot(pp_dims.mn,
                                       bar_just.n = -0.2,
                                       cex_bar.i = 5,
                                       cex_axis.i = 14,
                                       palette.vc = c(Samples = RColorBrewer::brewer.pal(3, "Set1")[2],
                                                      Features = RColorBrewer::brewer.pal(3, "Set1")[1],
                                                      Annotated = RColorBrewer::brewer.pal(3, "Set1")[3]),
                                       figure.c = "none")
pp_dims.ggplot <- pp_dims.ggplot + ggplot2::scale_y_continuous(trans = "log10", limits = c(1, NA),
                                                               expand = ggplot2::expansion(add = c(0, 1)))
print(pp_dims.ggplot)

ggplot2::ggsave("figures/article_data/sets_dims_postprocessed.pdf", pp_dims.ggplot)

1.2.2 Aggregated

If the datasets are loaded directly from the 5_aggregated step, the number of features will slightly change because this time one specific genotype is selected (LAT and WT, or MX2 and WT; instead of the full LAT and MX2 and WT initial dataset) and the quality control filter will be reapplied to the selected subsets:

  • proportion of NA < 20% in at least one class

  • variance > 0 in all classes)

  • and for the proteomics datasets: imputation of less than 20% of the samples in at least one class

aggreg_mset.ls <- lapply(ProMetIS::genes.vc(),
                         function(gene.c) {
                           gene.mset <- phenomis::reading(file.path(ProMetIS::aggregated_dir.c(gene.c)),
                                                          report.c = "none")
                           gene.mset <- gene.mset[, ProMetIS::sets.vc()]
                         })
names(aggreg_mset.ls) <- ProMetIS::genes.vc()
aggreg_dims.mn <- NULL
for (gene.c in ProMetIS::genes.vc()) {
  aggreg_gene.mset <- aggreg_mset.ls[[gene.c]]
  aggreg_gene_dims.mn <- t(sapply(names(aggreg_gene.mset),
                                  function(set.c) {
                                    eset <- aggreg_gene.mset[[set.c]]
                                    rev(dim(eset))
                                  }))
  colnames(aggreg_gene_dims.mn) <- paste0(gene.c, "_", colnames(aggreg_gene_dims.mn))
  aggreg_dims.mn <- cbind(aggreg_dims.mn, aggreg_gene_dims.mn)
}

aggreg_dims.mn <- aggreg_dims.mn[c("preclinical",
                                   "proteomics_liver",
                                   "metabolomics_liver_c18hypersil_pos",
                                   "metabolomics_liver_hilic_neg",
                                   "proteomics_plasma",
                                   "metabolomics_plasma_c18hypersil_pos",
                                   "metabolomics_plasma_hilic_neg",
                                   "metabolomics_plasma_c18acquity_pos",
                                   "metabolomics_plasma_c18acquity_neg"), ]

rownames(aggreg_dims.mn) <- c("preclinical",
                              "liver_proteomics",
                              "liver_metabo_c18hypersil_pos",
                              "liver_metabo_hilic_neg",
                              "plasma_proteomics",
                              "plasma_metabo_c18hypersil_pos",
                              "plasma_metabo_hilic_neg",
                              "plasma_metabo_c18acquity_pos",
                              "plasma_metabo_c18acquity_neg")

aggreg_dims.ggplot <- phenomis::gg_barplot(aggreg_dims.mn,
                                           bar_just.n = 0,
                                           cex_bar.i = 5,
                                           cex_axis.i = 14,
                                           palette.vc = c(LAT_Samples = as.character(ProMetIS::palette.vc(light.l = TRUE)["LAT"]),
                                                          LAT_Features = as.character(ProMetIS::palette.vc()["LAT"]),
                                                          MX2_Samples = as.character(ProMetIS::palette.vc(light.l = TRUE)["MX2"]),
                                                          MX2_Features = as.character(ProMetIS::palette.vc()["MX2"])),
                                           figure.c = "none")
aggreg_dims.ggplot <- aggreg_dims.ggplot + ggplot2::scale_y_continuous(trans = "log10", limits = c(1, NA),
                                                                       expand = ggplot2::expansion(add = c(0, 1)))
print(aggreg_dims.ggplot)

ggplot2::ggsave("figures/article_data/sets_dims_aggregated.pdf", aggreg_dims.ggplot)

2 Preclinical features

ppclin.eset <- pp.mset[["preclinical"]]

ppclin_pie.ggplot <- ProMetIS:::.preclin_pie(Biobase::fData(ppclin.eset), y.c = "category",
                                             geom_text.ls = list(lab.i = 4, legend_text.i = 8, title.i = 18),
                                             label.c = "value")

ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig2_preclinical_categories.pdf", ppclin_pie.ggplot,
                width = 7, height = 8)

3 Technical validation

For each dataset, numerical and graphical quality controls are presented.

3.1 Preclinical

The objective is to check that the WT lines from the ProMetIS study have similar phenotyping values compared to all other WT lines generated by PHENOMIN ICS.

# intensities of the WT mice in ProMetIS

ppclin_wt.eset <- ppclin.eset[, grep("W....", Biobase::sampleNames(ppclin.eset))]
ppclin_wt.mn <- Biobase::exprs(ppclin_wt.eset)

# transforming the values and computing the means by sex type

ppclin_trans.vc <- Biobase::fData(ppclin_wt.eset)[, "transformation"]

ppclin_wt.mn[ppclin_trans.vc == "inverse", ] <- 1/ppclin_wt.mn[ppclin_trans.vc == "inverse", ]
ppclin_wt.mn[ppclin_trans.vc == "log", ] <- log(ppclin_wt.mn[ppclin_trans.vc == "log", ])
ppclin_wt.mn[ppclin_trans.vc == "sqrt", ] <- sqrt(ppclin_wt.mn[ppclin_trans.vc == "sqrt", ])

ppclin_mean.mn <- t(apply(ppclin_wt.mn, 1,
                          function(feat.vn)
                            tapply(feat.vn, Biobase::pData(ppclin_wt.eset)[, "sex"],
                                   function(x) mean(x, na.rm = TRUE))))
ics.eset <- phenomis::reading(file.path(ProMetIS::processed_dir.c(), "ics"), report.c = "none")

# restricting to the WT (C57BL6/N) mice not included in the ProMetIS project
ics_wt.eset <- ics.eset[, Biobase::pData(ics.eset)[, "gene"] == "WT" &
                          !(Biobase::pData(ics.eset)[, "impc_id"] %in% Biobase::pData(ppclin.eset)[, "impc_id"])]
ics_wt.mn <- Biobase::exprs(ics_wt.eset)

message("Number of all WT PHENOMIN WT mice except those from the ProMetIS project: ", ncol(ics_wt.eset))
message("Number of males and females:")
print(table(Biobase::pData(ics_wt.eset)[, "sex"]))
## 
##   F   M 
## 539 573
ics_trans.vc <- Biobase::fData(ics_wt.eset)[, "transformation"]
message("Percentage of variables with a normal distribution (possibly following a transformation): ",
        round((1 - sum(ics_trans.vc == "non parametric") / length(ics_trans.vc)) * 100), "%")

ics_wt.mn[ics_trans.vc == "inverse", ] <- t(apply(ics_wt.mn[ics_trans.vc == "inverse", ], 1,
                                                  function(feat.vn) {
                                                    feat.vn[abs(feat.vn) < .Machine$double.eps] <- NA
                                                    1 / feat.vn
                                                  }))
ics_wt.mn[ics_trans.vc == "log", ] <- t(apply(ics_wt.mn[ics_trans.vc == "log", ], 1,
                                              function(feat.vn) {
                                                feat.vn[abs(feat.vn) < .Machine$double.eps] <- NA
                                                log(feat.vn)
                                              })) 
ics_wt.mn[ics_trans.vc == "sqrt", ] <- sqrt(ics_wt.mn[ics_trans.vc == "sqrt", ])
ics_range.mn <- matrix(0, nrow = nrow(ics_wt.mn), ncol = 4,
                       dimnames = list(rownames(ics_wt.mn), c("F_inf", "F_sup", "M_inf", "M_sup")))
sex.vc <- Biobase::pData(ics_wt.eset)[, "sex"]

ics_range.mn[ics_trans.vc == "non parametric", ] <- t(apply(ics_wt.mn[ics_trans.vc == "non parametric", ], 1,
                                                            function(feat.vn) {
                                                              c(sapply(names(table(sex.vc)),
                                                                       function(sex.c) {
                                                                         quantile(feat.vn[sex.vc == sex.c],
                                                                                  c(0.05/2, 1 - 0.05/2),
                                                                                  na.rm = TRUE)
                                                                       }))
                                                            }))
ics_range.mn[ics_trans.vc != "non parametric", ] <- t(apply(ics_wt.mn[ics_trans.vc != "non parametric", ], 1,
                                                            function(feat.vn) {
                                                              c(sapply(names(table(sex.vc)),
                                                                       function(sex.c) {
                                                                         feat_sex.vn <- feat.vn[sex.vc == sex.c]
                                                                         mean(feat_sex.vn, na.rm = TRUE) + c(-1, 1) * 2 * sd(feat_sex.vn, na.rm = TRUE)
                                                                       }))
                                                            }))
ppclin_mean.mi <- cbind(F = ics_range.mn[, "F_inf"] <= ppclin_mean.mn[, "F"] &
                          ppclin_mean.mn[, "F"] <= ics_range.mn[, "F_sup"],
                        M = ics_range.mn[, "M_inf"] <= ppclin_mean.mn[, "M"] &
                          ppclin_mean.mn[, "M"] <= ics_range.mn[, "M_sup"])

mode(ppclin_mean.mi) <- "numeric"

ppclin_pass.vl <- rowSums(ppclin_mean.mi) == 2

ppclin_unpass.mn <- cbind(F = ppclin_mean.mn[, "F"], ics_range.mn[, c("F_inf", "F_sup")],
                          M = ppclin_mean.mn[, "M"], ics_range.mn[, c("M_inf", "M_sup")])[which(is.na(ppclin_pass.vl) | !ppclin_pass.vl), , drop = FALSE]

print(ppclin_unpass.mn)
##                                          F    F_inf    F_sup        M    M_inf
## Eye_Left.vitreous.humor.thickness..µm. 566 483.6572 573.9499 560.6667 504.8498
##                                           M_sup
## Eye_Left.vitreous.humor.thickness..µm. 559.8606
# 'Gross Pathology_Body Weight (g)' contains to few values to be checked by the reference range
ppclin_pass.vl["Gross Pathology_Body Weight (g)"] <- TRUE

# 'Eye morphology (OCT + slit lamp)_Left vitreous humor thickness (µm)' is kept although the mean in males (560.6667) is slightly higher than the 2 * SD (559.86062)
ppclin_pass.vl["Eye morphology (OCT + slit lamp)_Left vitreous humor thickness (µm)"] <- TRUE

stopifnot(all(ppclin_pass.vl))

3.1.1 PCA

wt.eset <- ics.eset[, Biobase::pData(ics.eset)[, "gene"] == "WT"]
wt.pca <- ropls::opls(wt.eset, predI = 4)
## PCA
## 1127 samples x 200 variables
## standard scaling of predictors
## 31007 (14%) NAs
##       R2X(cum) pre ort
## Total    0.279   4   0

A total of 1127 WT lines and 200 features are available.

Biobase::pData(wt.eset)[, "prometis"] <- factor(ifelse(Biobase::pData(wt.eset)[, "impc_id"] %in%
                                                         Biobase::pData(ppclin.eset)[, "impc_id"],
                                                       "ProMetIS", "Other PHENOMIN"),
                                                levels = c("ProMetIS", "Other PHENOMIN"))
stopifnot(sum(Biobase::pData(wt.eset)[, "prometis"] == "ProMetIS") == 15)

The score plots in the first 4 dimensions are:

for (comp.i in 1:2)
  ropls::plot(wt.pca,
              parCompVi = c(1, 2) + 2 * (comp.i - 1),
              typeVc = "x-score")

techval_ppclin_pca.ggplot <- ProMetIS:::.preclinical_wt_scoreplot(wt.eset, wt.pca)

print(techval_ppclin_pca.ggplot)

ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_Fig5B_techval_preclin_pca.pdf"),
                techval_ppclin_pca.ggplot,
                height = 7, width = 7)

The loadings are displayed below (in particular the 3 features with most extreme values):

for (comp.i in 1:2)
  ropls::plot(wt.pca,
              parCompVi = c(1, 2) + 2 * (comp.i - 1),
              typeVc = "x-loading")

In particular, ALP and body weight have a strong influence on the first dimension (which discriminates M/F; see below).

wt_load.mn <- ropls::getLoadingMN(wt.pca)
for (comp.i in 1:4) {
  
  load.vn <- wt_load.mn[, comp.i]
  
  load_ext.vi <- c(order(load.vn)[1:3], rev(order(load.vn, decreasing = TRUE)[1:3]))
  
  print(wt_load.mn[load_ext.vi, comp.i, drop = FALSE])
  
}
##                                                                    p1
## Grip test_Weight (g)                                       -0.1854476
## Rotarod test_Body weight (g)                               -0.1847274
## Auditory startle reflex reactivity and PPI_Body weight (g) -0.1838232
## Pavlovian fear conditioning_Freezing Cue 2                  0.1017683
## Pavlovian fear conditioning_Freezing (%) Cue-2              0.1017685
## Clinical chemistry parameters_ALP (U/l)                     0.1570950
##                                                             p2
## Open field test_%Time Spent in the Center glob      -0.2538433
## Open field test_Permanence Time center (s)          -0.2538282
## Open field test_% distance in the Center glob. (cm) -0.2515523
## Open field test_av. speed center (cm/s)              0.1539070
## Open field test_Distance periphery (cm)              0.1886944
## Open field test_Permanence Time periphery (s)        0.2538322
##                                                            p3
## Open field test_Resting Time whole arena (s)       -0.2013668
## Open field test_Resting Time periphery (s)         -0.1922035
## Pavlovian fear conditioning_freezing total context -0.1364169
## Open field test_Dist I2 (cm)                        0.2131457
## Open field test_Distance whole arena (cm)           0.2396825
## Open field test_Dist global (cm)                    0.2396825
##                                                           p4
## Auditory startle reflex reactivity and PPI_Gl     -0.2093506
## Auditory startle reflex reactivity and PPI_% PP75 -0.2054430
## Grip test_Grip mean (4paws) (g)                   -0.1865101
## Auditory startle reflex reactivity and PPI_PP85    0.1765333
## Auditory startle reflex reactivity and PPI_PP90    0.1783836
## Auditory startle reflex reactivity and PPI_PP75    0.1844418

The first axis is related to weight, and Glucose, HDL, Albumin and Alkalyne Phosphatase:

for (feat.c in c("Grip test_Weight (g)",
                 "GTT_Glucose T120 (mmol/l)",
                 "Clinical chemistry parameters_HDL Chol. (mmol/l)",
                 "Clinical chemistry parameters_Albumin (g/l)",
                 "Clinical chemistry parameters_ALP (U/l)")) {
ropls::plot(wt.pca,
            typeVc = "x-score",
            parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
            parCompVi = c(1, 2),
            parLabVc = rep("x", dim(wt.eset)["Samples"]),
            parTitleL = FALSE)
title(feat.c)
}

In particular, we observe that mice tend to be separated according to sex along the first axis:

ropls::plot(wt.pca, typeVc = "x-score", parAsColFcVn = Biobase::pData(wt.eset)[, "sex"])

The second axis is related to behavior (permanence time in periphery):

for (feat.c in c("Open field test_Permanence Time periphery (s)")) {
  ropls::plot(wt.pca,
              typeVc = "x-score",
              parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
              parCompVi = c(1, 2),
              parLabVc = rep("x", dim(wt.eset)["Samples"]),
              parTitleL = FALSE)
  title(feat.c)
}

The two clusters on the 3-4 hyperplane are related to Haematology (RBC: red blood cell count; HGB: hemoglobin; HCT: hematocrit).

for (feat.c in c("Haematology - Complete blood count (CBC)on the Advia 120_RBC (x10E06 cells/µL)",
                 "Haematology - Complete blood count (CBC)on the Advia 120_HGB (g/dL)",
                 "Haematology - Complete blood count (CBC)on the Advia 120_HCT (%)")) {
  ropls::plot(wt.pca,
              typeVc = "x-score",
              parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
              parCompVi = c(3, 4),
              parLabVc = rep("x", dim(wt.eset)["Samples"]),
              parTitleL = FALSE)
  title(feat.c)
}

The orthogonal variation is related to behavior (global distance):

for (feat.c in c("Open field test_Dist global (cm)")) {
  ropls::plot(wt.pca,
              typeVc = "x-score",
              parAsColFcVn = Biobase::exprs(wt.eset)[feat.c, ],
              parCompVi = c(3, 4),
              parLabVc = rep("x", dim(wt.eset)["Samples"]),
              parTitleL = FALSE)
  title(feat.c)
}

3.2 Proteomics

techval_proteo_dir.c <- "../supplementary/technical_validation/proteomics"

# liver
prot_liver_file.c <- file.path(techval_proteo_dir.c,
                               "Données pour figures Strasbourg QC.xlsx")

# plasma
prot_plasma_file.c <- file.path(techval_proteo_dir.c,
                                "figureQC_proteomique.xlsx")

3.2.1 iRT

# liver
irt_liver.tb <- suppressMessages(readxl::read_excel(prot_liver_file.c,
                                                     sheet = 1))

irt_liver.df <- as.data.frame(irt_liver.tb,
                               stringsAsFactors = FALSE)

irt_liver.mn <- as.matrix(irt_liver.df[-1, -1])
mode(irt_liver.mn) <- "numeric"
rownames(irt_liver.mn) <- irt_liver.df[, 1][-1]

pept_liver.vc <- rownames(irt_liver.mn)
rownames(irt_liver.mn) <- paste0("P", 1:nrow(irt_liver.mn))

# plasma

irt_plasma.tb <- suppressMessages(readxl::read_excel(prot_plasma_file.c,
                                                      sheet = 2))

irt_plasma.df <- as.data.frame(irt_plasma.tb,
                                stringsAsFactors = FALSE)

irt_plasma.mn <- as.matrix(irt_plasma.df[3:13, 21:65])
mode(irt_plasma.mn) <- "numeric"
rownames(irt_plasma.mn) <- irt_plasma.df[, 2][3:13]
pept_plasma.vc <- rownames(irt_plasma.mn)
stopifnot(identical(pept_liver.vc, pept_plasma.vc))
rownames(irt_plasma.mn) <- rownames(irt_liver.mn)

3.2.2 CV

# liver
cv_liver.tb <- suppressMessages(readxl::read_excel(prot_liver_file.c,
                                                     sheet = 2,
                                                     range = "A1:E2469"))

cv_liver.df <- as.data.frame(cv_liver.tb,
                             stringsAsFactors = FALSE)

cv_liver.df <- cbind.data.frame(type = factor(rep(c("Pool", "WT", "LAT", "MX2"),
                                                  each = nrow(cv_liver.df)),
                                              levels = c("Pool", "WT", "LAT", "MX2")),
                                value = c(cv_liver.df[, 2], cv_liver.df[, 3],
                                          cv_liver.df[, 4], cv_liver.df[, 5]))

# plasma
cv_plasma.tb <- suppressMessages(readxl::read_excel(prot_plasma_file.c,
                                                      sheet = 4,
                                                      range = "A1:BT622"))

cv_plasma.df <- as.data.frame(cv_plasma.tb,
                                stringsAsFactors = FALSE)

cv_plasma.df <- cbind.data.frame(type = factor(rep(c("WT", "MX2", "LAT", "Pool"),
                                                   each = nrow(cv_plasma.df)),
                                               levels = c("Pool", "WT", "LAT", "MX2")),
                                 value = c(cv_plasma.df[, 69], cv_plasma.df[, 70],
                                           cv_plasma.df[, 71], cv_plasma.df[, 72]))

3.2.3 Plotting

irt_liver.gg <- ProMetIS:::.irt_plot(irt_liver.mn, "liver")
irt_plasma.gg <- ProMetIS:::.irt_plot(irt_plasma.mn, "plasma")
cv_liver.gg <- ProMetIS:::.cv_ggplot(cv_liver.df, "liver")
cv_plasma.gg <- ProMetIS:::.cv_ggplot(cv_plasma.df, "plasma")
prot_qc_plot <- gridExtra::grid.arrange(irt_liver.gg,
                                        cv_liver.gg,
                                        irt_plasma.gg,
                                        cv_plasma.gg,
                                        nrow = 2, ncol = 2,
                                        widths = c(4, 2),
                                        heights = c(3, 3))

ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig6_techval_proteo_irt_cv.pdf", prot_qc_plot, width = 12, height = 7)

3.3 Metabolomics

Loading the processed datasets:

p.mset <- phenomis::reading(ProMetIS::processed_dir.c(),
                            report.c = "none")
pmetabo.mset <- p.mset[, ProMetIS::metabo_sets.vc()]

Post-processing in the ‘technical validation’ mode (keeping standards, keeping pools, omitting the pool_CV <= 30% and pool_CV_over_sample_CV <= 1 filters)

pmsmetabo.mset <- ProMetIS:::.metabo_postprocessing(metabo.mset = pmetabo.mset,
                                                   drift_correct.c = "prometis",
                                                   .technical_validation.l = TRUE)
## [1] "metabolomics_liver_c18hypersil_pos"
## [1] "metabolomics_liver_hilic_neg"
## [1] "metabolomics_plasma_c18hypersil_pos"

## [1] "metabolomics_plasma_hilic_neg"

## [1] "metabolomics_plasma_c18acquity_pos"

## [1] "metabolomics_plasma_c18acquity_neg"

# save(pmsmetabo.mset, file = "../supplementary/technical_validation/metabolomics/metabo_mset.rdata")
# load("../supplementary/technical_validation/metabolomics/metabo_mset.rdata")

3.3.1 Standards

standards_info.vc <- c("2-Aminoanthracene [ExS]_#0000FF",
                       "Alanine 13C [ExS]_#0040FF",
                       "Amiloride [ExS]_#0080FF",
                       "Aspartate 15N [ExS]_#00BFFF",
                       "Atropine [ExS]_#00FFFF",
                       "Colchicine [ExS]_#00FFBF", 
                       "Dihydrostreptomycin [ExS]_#00FF80",
                       "Ethylmalonic acid [ExS]_#00FF40",
                       "Glucose 13C [ExS]_#00FF00",
                       "Imipramine [ExS]_#40FF00",
                       "Metformin [ExS]_#80FF00",
                       "Prednisone [ExS]_#BFFF00",
                       "Roxithromycin (fragment) [ExS]_#FFFF00",
                       "AMPA [IS]_#FFBF00",
                       "Dimetridazole [IS]_#FF8000",
                       "Dinoseb [IS]_#FF4000",
                       "MCPA [IS]_#FF0000")
standards_type.vc <-  sapply(standards_info.vc,
                     function(info.c)
                       unlist(strsplit(info.c, "_"))[1])
standards_name.vc <-  sapply(standards_type.vc,
                     function(standard.c)
                       unlist(strsplit(standard.c, " [", fixed = TRUE))[1])
palette.vc <- sapply(standards_info.vc,
                     function(info.c)
                       unlist(strsplit(info.c, "_"))[2])
palette.vc <- c(RColorBrewer::brewer.pal(8, "Dark2"),
                RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7:9)],
                RColorBrewer::brewer.pal(3, "Set2")[1])
names(palette.vc) <- standards_name.vc

out_sample.ls <- standards_cv.ls <- list()

for (set.c in grep("(c18hyper|hilic)", names(pmsmetabo.mset), value = TRUE)) {
  
  # set.c <- grep("(c18hyper|hilic)", names(pmsmetabo.mset), value = TRUE)[3]

  eset <- pmsmetabo.mset[[set.c]]
  eset <- eset[Biobase::fData(eset)[, "standard"] != "",
               Biobase::pData(eset)[, "sampleType"] %in% c("pool", "sample")]
  
  #ggplot needs a dataframe
  exprs.mn <- Biobase::exprs(eset)
  
  tlog2exprs.df <- as.data.frame(t(log2(1 + exprs.mn)))
  colnames(tlog2exprs.df) <- Biobase::fData(eset)[, "standard"]
 
  # re-ordering
  tlog2exprs.df <- tlog2exprs.df[, standards_type.vc[standards_type.vc %in% colnames(tlog2exprs.df)]]

  # computing CVs
  standards_cv.ls[[set.c]] <- apply(as.matrix(tlog2exprs.df), 2, function(x) sd(x) / mean(x))
  
  #id variable for position in matrix 
  tlog2exprs.df$id <- 1:nrow(tlog2exprs.df) 
  #reshape to long format
  ggplot.df <- reshape2::melt(tlog2exprs.df, id.var = "id")
  
  ggplot.df[, "sample"] <- factor(rep(gsub("pool1", "pool",
                                           Biobase::pData(eset)[, "sampleType"]),
                                      ncol(tlog2exprs.df) - 1),
                                  levels = c("pool",
                                             "sample"))
  
  ggplot.df[, "sample_name"] <- rep(Biobase::sampleNames(eset),
                                    ncol(tlog2exprs.df) - 1)
  
  ggplot.df[, "compound_standard"] <- ggplot.df[, "variable"]
  ggplot.df[, "compound"] <- vapply(as.character(ggplot.df[, "compound_standard"]),
                                           function(comp.c) {
                                             bracket.i <- which(unlist(strsplit(comp.c, split = "")) == "[")
                                             substr(comp.c, 1, bracket.i - 2)
                                           }, character(1))
  ggplot.df[, "standard"] <- vapply(as.character(ggplot.df[, "compound_standard"]),
                                    function(comp.c) {
                                      bracket.i <- which(unlist(strsplit(comp.c, split = "")) == "[")
                                      substr(comp.c, bracket.i, nchar(comp.c))
                                    }, character(1))
  ggplot.df[, "injectionOrder"] <- ggplot.df[, "id"]
  ggplot.df[, "intensity"] <- ggplot.df[, "value"]
  
  ggplot.df[, "mean"] <- rep(tapply(ggplot.df[, "intensity"], ggplot.df[, "variable"], mean), table(ggplot.df[, "variable"]))
  ggplot.df[, "sd"] <- rep(tapply(ggplot.df[, "intensity"], ggplot.df[, "variable"], sd), table(ggplot.df[, "variable"]))
  ggplot.df[, "lcl"] <- ggplot.df[, "mean"] - 2 * ggplot.df[, "sd"]
    ggplot.df[, "ucl"] <- ggplot.df[, "mean"] + 2 * ggplot.df[, "sd"]

  ggplot.df[, "zscore"] <- (ggplot.df[, "intensity"] - ggplot.df[, "mean"]) / ggplot.df[, "sd"]
    
  ggplot.df[, "pval"] <- format(2 * (1 - pnorm(abs(ggplot.df[, "zscore"]))), scientific = TRUE)

  out_sample.vl <- abs(ggplot.df[, "zscore"]) > 3
  if (sum(out_sample.vl)) {
    out_sample.ls[[set.c]] <- ggplot.df[out_sample.vl, c("sample_name",
                                                         "compound_standard",
                                                         "injectionOrder",
                                                         "intensity",
                                                         "lcl",
                                                         "ucl",
                                                         "zscore",
                                                         "pval"), drop = FALSE]
  } else
    out_sample.ls[[set.c]] <- data.frame()
  
  #plot
  standards.ggplot <- ggplot2::ggplot(ggplot.df,
                                      ggplot2::aes(x = injectionOrder,
                                                   y = intensity,
                                                   group = compound,
                                                   colour = compound)) +
    ggplot2::geom_point(ggplot2::aes(shape = sample)) +
    ggplot2::scale_shape_manual(values = c(5, 18)) +
    ggplot2::geom_line(ggplot2::aes(lty = standard), size = 0.5) +
    ggplot2::scale_linetype_manual(values = c("solid", "dotted"))+
    # ggplot2::geom_line(ggplot2::aes(x = injectionOrder, y = mean, colour = compound), lty = "solid", size = 0.5) +
    ggplot2::geom_ribbon(ggplot2::aes(x = injectionOrder, ymin = lcl, ymax = ucl, group = compound,
                                      fill = compound), linetype = "dashed", alpha = 0.1) +
    ggplot2::labs(title = gsub("metabolomics_", "", set.c),
                  x = "Injection order", y = "Intensity (log2)") +
    ggplot2::scale_color_manual(values = palette.vc[unique(ggplot.df[, "compound"])]) +
    ggplot2::scale_fill_manual(values = palette.vc[unique(ggplot.df[, "compound"])]) +
    # ggplot2::scale_color_brewer(palette = "Set3") +
    # ggplot2::scale_fill_brewer(palette = "Set3") +
    ggplot2::coord_cartesian(ylim = c(13, 28)) +
    ggplot2::theme_light() +
    ggplot2::theme(plot.title = ggplot2::element_text(size = 20, face = "bold"),
                   axis.text = ggplot2::element_text(size = 17),
                   axis.title = ggplot2::element_text(size = 17, face = "bold"),
                   legend.title = ggplot2::element_text(size = 17, face = "bold"),
                   legend.text = ggplot2::element_text(size = 17))
    # ggplot2::geom_rug(sides = "b", mapping = ggplot2::aes(group = sample))
  
  ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS5_metabo_standards_",
                         gsub("metabolomics_", "", set.c), ".pdf"),
                  standards.ggplot, width = 12)

}

print(round(c(min = min(unlist(standards_cv.ls)),
              mean = mean(unlist(standards_cv.ls)),
              max = max(unlist(standards_cv.ls)),
              sd = sd(unlist(standards_cv.ls))) * 100))
##  min mean  max   sd 
##    0    2    5    1
print(out_sample.ls)
## $metabolomics_liver_c18hypersil_pos
##                sample_name   compound_standard injectionOrder intensity
## 60  X20180906_827F_pos_021   Alanine 13C [ExS]              8  13.44732
## 178 X20180906_818F_pos_035 Aspartate 15N [ExS]             22  21.13316
## 306 X20180906_627F_pos_060    Imipramine [ExS]             46  24.30825
## 358 X20180906_627F_pos_060    Prednisone [ExS]             46  21.59884
##          lcl      ucl    zscore         pval
## 60  14.61297 17.75892 -3.482105 4.974895e-04
## 178 19.49298 20.69972  3.436734 5.887746e-04
## 306 24.57520 25.34977 -3.378586 7.285958e-04
## 358 22.21808 23.29546 -4.299067 1.715190e-05
## 
## $metabolomics_liver_hilic_neg
##                 sample_name compound_standard injectionOrder intensity      lcl
## 38   X20180910_629F_neg_061   Amiloride [ExS]             38  15.10089 17.56935
## 48  X20180910_QC.EI_neg_071   Amiloride [ExS]             48  15.19781 17.56935
## 189  X20180910_634F_neg_056 Glucose 13C [ExS]             33  14.64951 17.47429
##          ucl    zscore         pval
## 38  21.39874 -4.578437 4.684638e-06
## 48  21.39874 -4.477195 7.563028e-06
## 189 20.95346 -5.247646 1.540546e-07
## 
## $metabolomics_plasma_c18hypersil_pos
##                sample_name         compound_standard injectionOrder intensity
## 126 X20180906_818P_pos_035       Aspartate 15N [ExS]             22  19.72965
## 210 X20180906_617P_pos_015          Colchicine [ExS]              2  21.59074
## 262 X20180906_617P_pos_015 Dihydrostreptomycin [ExS]              2  22.40049
## 366 X20180906_617P_pos_015          Imipramine [ExS]              2  24.40131
## 542 X20180906_818P_pos_035        Dimetridazole [IS]             22  26.20310
## 552 X20180906_836P_pos_046        Dimetridazole [IS]             32  26.25760
##          lcl      ucl    zscore         pval
## 126 18.49164 19.38674  3.532401 4.118048e-04
## 210 21.98452 23.04404 -3.486650 4.891107e-04
## 262 22.62489 23.43031 -3.114410 1.843131e-03
## 366 24.71776 25.80387 -3.165454 1.548410e-03
## 542 26.40534 26.99094 -3.381358 7.212841e-04
## 552 26.40534 26.99094 -3.009107 2.620172e-03
## 
## $metabolomics_plasma_hilic_neg
##                sample_name compound_standard injectionOrder intensity      lcl
## 4   X20180910_513P_neg_027   Amiloride [ExS]              4  15.28016 16.82327
## 269 X20180910_631P_neg_032      Dinoseb [IS]              9  27.25426 27.48381
## 340 X20180910_527P_neg_051         MCPA [IS]             28  25.41963 25.71633
##          ucl    zscore         pval
## 4   20.37539 -3.737676 1.857294e-04
## 269 28.02777 -3.687924 2.260913e-04
## 340 26.32446 -3.951528 7.765389e-05
print(max(abs(Reduce("rbind.data.frame", out_sample.ls)[, "zscore"])))
## [1] 5.247646
sapply(out_sample.ls, nrow)
##  metabolomics_liver_c18hypersil_pos        metabolomics_liver_hilic_neg 
##                                   4                                   3 
## metabolomics_plasma_c18hypersil_pos       metabolomics_plasma_hilic_neg 
##                                   6                                   3
for (set.c in names(out_sample.ls)) {
eset <- pmsmetabo.mset[[set.c]]
eset.pca <- ropls::opls(eset, fig.pdfC = "none", info.txtC = "none", predI = 4)
ropls::plot(eset.pca,
            typeVc = "x-score",
            parAsColFcVn = ifelse(Biobase::sampleNames(eset) %in% out_sample.ls[[set.c]][, "sample_name"], 1, 0),
            parTitleL = FALSE)
title(gsub("metabolomics_", "", set.c))
}

3.3.2 Quality metrics

quality_metrics.ls <- ProMetIS:::.metabo_quality_metrics(pmsmetabo.mset)

# save(quality_metrics.ls, file = "../supplementary/technical_validation/metabolomics/quality_metrics_ls.rdata")
# load("../supplementary/technical_validation/metabolomics/quality_metrics_ls.rdata")
quality_table.ls <- list()

for (method.c in c("none", "pool", "sample", "prometis")) {
  
  load(paste0("../supplementary/technical_validation/metabolomics/quality_metrics_ls",
              ifelse(method.c != "prometis", paste0("_", method.c), ""),
              ".rdata"))
  
  quality.mn <- quality_metrics.ls[["quality.mn"]]
  
  quality.mn <- t(quality.mn)[, c("correl_inj_test",
                                  "correl_inj_pca",
                                  "pool_spread_pca",
                                  "poolCV_0.3",
                                  "ICCpool")]
  colnames(quality.mn) <- c("Drift Spearman", "Drift PCA", "QC spread", "QC CV", "QC ICC")
  
  for (metric.c in c("Drift Spearman", "Drift PCA", "QC spread"))
    quality.mn[, metric.c] <- 100 - quality.mn[, metric.c]
  
  quality.mn <- round(quality.mn)
  
  quality_table.ls[[method.c]] <- quality.mn
  
}

min_quality.mn <- t(sapply(quality_table.ls, function(matrix.mn) {
  c(drift = min(matrix.mn[, 1:2]), qc = min(matrix.mn[, 3:5]))
}))
rownames(min_quality.mn) <- c("None", "Pools", "Samples", "ProMetIS")

plot(min_quality.mn, type = "n", xlim = c(0, 100), ylim = c(0, 100),
     xlab = "Drift metrics", ylab = "QC metrics",
     main = "Minimum quality metric values\ndepending on the correction method")
text(min_quality.mn, labels = rownames(min_quality.mn))

write.table(quality_table.ls[["prometis"]],
                col.names = NA,
                file = "figures/article_data/ImbertEtAl_Table1_techval_metabo_metrics.tsv",
                sep = "\t")

rowMeans(quality_table.ls[["prometis"]])
##  liver_c18hypersil_pos        liver_hilic_neg plasma_c18hypersil_pos 
##                   85.2                   89.0                   92.2 
##       plasma_hilic_neg  plasma_c18acquity_pos  plasma_c18acquity_neg 
##                   93.8                   83.8                   88.2
quality_ggparcoord.ls <- quality_ggradar.ls <- list()

for (method.c in c("none", "pool", "sample", "prometis")) {
  
  quality.df <- as.data.frame(quality_table.ls[[method.c]])
  
  quality.df <- cbind.data.frame(group = factor(rownames(quality.df),
                                                levels = rownames(quality.df)),
                                 quality.df)
  
  quality_ggradar.ls[[method.c]] <- ggradar::ggradar(quality.df,
                                                     base.size = 15,
                                                     values.radar = c("0", "50", "100"),
                                                     grid.min = 0, grid.mid = 50, grid.max = 100,
                                                     # Polygones
                                                     group.line.width = 2, 
                                                     group.point.size = 3,
                                                     group.colours = RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7)],
                                                     # Background
                                                     background.circle.colour = "white",
                                                     gridline.mid.colour = "grey",
                                                     # Legend
                                                     plot.title = method.c,
                                                     legend.text.size = 15,
                                                     legend.position = "right")
  
  quality_ggparcoord.ls[[method.c]] <- GGally::ggparcoord(quality.df, columns = c(3, 2, 4:6),
                                                          groupColumn = 1,
                                                          scale = "globalminmax") +
    ggplot2::geom_line(size = 2) +
    ggplot2::labs(title = "",
                  x = "", y = "Score") +
    ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[c(1:5, 7)]) +
    ggplot2::coord_cartesian(ylim = c(0, 100)) +
    ggplot2::theme_light() +
    ggplot2::theme(plot.title = ggplot2::element_text(size = 20, face = "bold"),
                   axis.text = ggplot2::element_text(size = 17),
                   axis.title = ggplot2::element_text(size = 17, face = "bold"),
                   legend.position = "bottom",
                   legend.title = ggplot2::element_blank(),
                   legend.text = ggplot2::element_text(size = 17))
  
}

ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig10A_techval_metabo_radar.pdf",
                quality_ggradar.ls[["prometis"]],
                height = 7, width = 10.5)
ggplot2::ggsave("figures/article_data/ImbertEtAl_Fig10Abis_techval_metabo_parallel.pdf",
                quality_ggparcoord.ls[["prometis"]],
                height = 7, width = 10.5)

for (method.c in setdiff(names(quality_ggradar.ls), "prometis")) {
  ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS6_techval_metabo_radar_", method.c, ".pdf"),
                  quality_ggradar.ls[[method.c]],
                  height = 7, width = 10.5)
  ggplot2::ggsave(paste0("../supplementary/technical_validation/metabolomics/ImbertEtAl_FigS6bis_techval_metabo_parallel_", method.c, ".pdf"),
                  quality_ggparcoord.ls[[method.c]],
                  height = 7, width = 10.5)
}

3.3.3 Score plots

scoreplot.ls <- vector(mode = "list", length = length(ProMetIS::metabo_sets.vc()))
names(scoreplot.ls) <- ProMetIS::metabo_sets.vc()

for (set.c in names(pmsmetabo.mset)) {
  
  pmsmetabo.eset <- pmsmetabo.mset[[set.c]]
  
  pmsmetabo.eset <- pmsmetabo.eset[, Biobase::pData(pmsmetabo.eset)[, "sampleType"] %in% c("pool", "sample")]
  
  pdata.df <- Biobase::pData(pmsmetabo.eset)
  pdata.df[, "order"] <- pdata.df[, "injectionOrder"]
  if (grepl("acqui", set.c))
    pdata.df[, "order"] <- pdata.df[, "order"] - min(pdata.df[, "order"]) + 1
  pdata.df[, "label"] <- ifelse(pdata.df[, "sampleType"] == "pool", "QC", "s")
  Biobase::pData(pmsmetabo.eset) <- pdata.df
  
  pmsmetabo.pca <- ropls::opls(pmsmetabo.eset, predI = 2, fig.pdfC = "none", info.txtC = "none")
  
  scoreplot.ls[[set.c]] <- ProMetIS:::score_plotly(pmsmetabo.pca,
                                                   label.c = "label",
                                                   color.c = "order",
                                                   title.c = gsub("metabolomics_", "", set.c),
                                                   figure.c = "interactive",
                                                   size.ls = list(axis_lab.i = 12,
                                                                  axis_text.i = 10,
                                                                  point.i = 2,
                                                                  label.i = 4,
                                                                  title.i = 15,
                                                                  legend_title.i = 10,
                                                                  legend_text.i = 10),
                                                   plot.l = FALSE)
  
}

scoreplots.p <- gridExtra::grid.arrange(grobs = scoreplot.ls[c("metabolomics_liver_c18hypersil_pos",
                                                               "metabolomics_plasma_c18hypersil_pos",
                                                               "metabolomics_plasma_c18acquity_pos",
                                                               "metabolomics_liver_hilic_neg",
                                                               "metabolomics_plasma_hilic_neg",
                                                               "metabolomics_plasma_c18acquity_neg")],
                                        nrow = 2)

ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_Fig7B_techval_metabo_pca.pdf"), scoreplots.p,
                height = 7, width = 10.5)

3.3.4 CV

cv_ggplot.ls <- vector(mode = "list", length = length(names(pmsmetabo.mset)))
names(cv_ggplot.ls) <- names(pmsmetabo.mset)

for (set.c in names(pmsmetabo.mset)) {
  
  # set.c <- names(pmsmetabo.mset)[2]
  pmsmetabo.eset <- pmsmetabo.mset[[set.c]]
  pmsmetabo.eset <- pmsmetabo.eset[, Biobase::pData(pmsmetabo.eset)[, "sampleType"] %in% c("pool", "sample")]
  
  genotype.vc <- tolower(substr(Biobase::pData(pmsmetabo.eset)[,
                                                     ifelse(grepl("(hyper|hilic)", set.c),
                                                            "KO", "Type")], 1, 1))
  
  type.vc <- vapply(seq_len(Biobase::dims(pmsmetabo.eset)["Samples", 1]),
                    function(sample.i) {
                      if (Biobase::pData(pmsmetabo.eset)[sample.i, "sampleType"] == "pool") {
                        return("Pool")
                      } else if (genotype.vc[sample.i] == "l") {
                        return("LAT")
                      } else if (genotype.vc[sample.i] == "m") {
                        return("MX2")
                      } else {
                        return("WT")
                      }
                    }, FUN.VALUE = character(1))
  cv.mn <- 100 * as.matrix(t(apply(Biobase::exprs(pmsmetabo.eset), 1,
                                   function(feat.vn) {
                                     tapply(feat.vn, type.vc,
                                            function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE))
                                   })))
  
  cv.df <- cbind.data.frame(type = factor(rep(c("Pool", "WT", "LAT", "MX2"),
                                              each = nrow(cv.mn)),
                                          levels = c("Pool", "WT", "LAT", "MX2")),
                            value = c(cv.mn[, "Pool"], cv.mn[, "WT"],
                                      cv.mn[, "LAT"], cv.mn[, "MX2"]))
  
  cv_ggplot.ls[[set.c]] <- ProMetIS:::.cv_ggplot(cv.df, title.c = gsub("metabolomics_", "", set.c))
  
}

metabo_cv_plot <- gridExtra::marrangeGrob(cv_ggplot.ls,
                                          nrow = 2, ncol = 3)

ggplot2::ggsave(paste0("figures/article_data/ImbertEtAl_FigS9_metabo_cv_boxplots.pdf"), metabo_cv_plot,
                height = 7, width = 10.5)
pdf("figures/article_data/ImbertEtAl_FigS7_metabo_cv_cumulative.pdf",
    height = 7, width = 10.5)

ProMetIS:::.metabo_quality_plot(quality_metrics.ls = quality_metrics.ls, "cv_percent")
## [1] "liver_c18hypersil_pos"  "liver_hilic_neg"        "plasma_c18hypersil_pos"
## [4] "plasma_hilic_neg"       "plasma_c18acquity_pos"  "plasma_c18acquity_neg"
dev.off()
## png 
##   2

3.3.5 ICC

pdf("figures/article_data/ImbertEtAl_FigS8_metabo_icc_cumulative.pdf",
    height = 7, width = 10.5)
    
ProMetIS:::.metabo_quality_plot(quality_metrics.ls = quality_metrics.ls, "icc_intens")
## [1] "liver_c18hypersil_pos"  "liver_hilic_neg"        "plasma_c18hypersil_pos"
## [4] "plasma_hilic_neg"       "plasma_c18acquity_pos"  "plasma_c18acquity_neg"
dev.off()
## png 
##   2

3.4 Sex impact

ppstat.mset <- phenomis::hypotesting(pp.mset,
                                     test.c = "limma",
                                     factor_names.vc = "sex",
                                     figure.c = "none",
                                     report.c = "none")

message("Percentage of features with significant differences between males and females:")
ppstat.sex.vn <- sapply(Biobase::fData(ppstat.mset),
                        function(fdata.df) {
                          round(sum(fdata.df[, "limma_sex_F.M_signif"], na.rm = TRUE) / nrow(fdata.df) * 100)
                        })
ppstat.sex.mn <- as.matrix(ppstat.sex.vn)
colnames(ppstat.sex.mn) <- "%"
print(ppstat.sex.mn)
##                                      %
## preclinical                         34
## proteomics_liver                    40
## proteomics_plasma                   30
## metabolomics_liver_c18hypersil_pos  14
## metabolomics_liver_hilic_neg        14
## metabolomics_plasma_c18hypersil_pos 20
## metabolomics_plasma_hilic_neg       24
## metabolomics_plasma_c18acquity_pos  37
## metabolomics_plasma_c18acquity_neg  37
message("Range: ", paste(range(ppstat.sex.vn), collapse = ", "))

3.4.1 on intensities

int_range_ggplot.ls <- list()

for (set.c in setdiff(names(pp.mset), "preclinical")) {
  # set.c <- "proteomics_liver"
  pp.eset <- pp.mset[[set.c]]
  pp_exprs.mn <- Biobase::exprs(pp.eset)
  pp_pdata.df <- Biobase::pData(pp.eset)
  
  pp_exprs.tb <- tidyr::as_tibble(pp_exprs.mn)
  int_range.df <- as.data.frame(tidyr::pivot_longer(pp_exprs.tb,
                                                    cols = 1:ncol(pp_exprs.tb),
                                                    names_to = "sample",
                                                    values_to = "intensity"))
  
  int_range.df[, "gene"] <- factor(pp_pdata.df[int_range.df[, "sample"], "gene"],
                                   levels = c("WT", "LAT", "MX2"))
  int_range.df[, "sex"] <- factor(pp_pdata.df[int_range.df[, "sample"], "sex"],
                                  levels = c("M", "F"))
  
  int_samp_order.vc <- rownames(pp_pdata.df[order(factor(pp_pdata.df[, "gene"], levels = c("WT", "LAT", "MX2"))), ])
  int_range.df[, "sample"] <- factor(int_range.df[, "sample"],
                                     levels = int_samp_order.vc)
  
  
  p <- ggplot2::ggplot(int_range.df,
                       ggplot2::aes(x = sample, y = intensity, fill = sex, col = gene)) +
    ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
    ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
    ggplot2::scale_fill_manual(values = c(M = "lightblue2",
                                          F = "lightpink1")) +
    ggplot2::labs(title = set.c, x = "Samples", y = "Intensity (log2)") +
    ggplot2::theme_light() +
    ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
                   legend.title = ggplot2::element_blank(),
                   axis.text = ggplot2::element_text(size = 11, face = "bold"),
                   axis.text.x = ggplot2::element_text(size = 11, angle = 90),
                   axis.title = ggplot2::element_text(size = 13, face = "bold"),
                   plot.title = ggplot2::element_text(size = 15, face = "bold"))
  
  int_range_ggplot.ls[[set.c]] <- p
  
}

int_range.gg <- gridExtra::grid.arrange(grobs = int_range_ggplot.ls,
                                        nrow = length(ProMetIS::sets.vc()) - 1, ncol = 2)

ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS10_intensity_range.pdf", int_range.gg,
                width = 14, height = 35)

3.4.2 on CVs

Comparing the CV values from the raw protein intensities provided by the partners and used in Figure 6 and from the processed data in the ProMetIS package:

cv_compare_gg.ls <- vector("list", length = 2)
names(cv_compare_gg.ls) <- ProMetIS::tissues.vc()

for (tissue.c in ProMetIS::tissues.vc()) {

  # tissue.c <- "liver"
  
  # cv_liver.df and cv_plasma.df tables provided by the partners and read above (line 451 and following)
    
  cv_partner.df <- eval(parse(text = paste0("cv_", tissue.c, ".df")))
  
  cv_compare_gg.ls[[tissue.c]][["partner"]] <- ProMetIS:::.cv_ggplot(cv_partner.df, "liver [partner]")
  
  # tables obtained from the processed datasets loaded above (line 550 and following)
  
  eset <- pp.mset[[paste0("proteomics_", tissue.c)]]
  gene.fc <- factor(Biobase::pData(eset)[, "gene"],
                    levels = c("WT", "LAT", "MX2"))
  cv_processed.mn <- t(apply(2^Biobase::exprs(eset), 1,
                       function(feat.vn) {
                         tapply(feat.vn, gene.fc,
                                function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE))
                       })) * 100
  cv_processed.df <- tidyr::gather(as.data.frame(cv_processed.mn),
                             key = "gene", value = "cv")
  
  cv_compare_gg.ls[[tissue.c]][["processed"]] <- ProMetIS:::.cv_ggplot(cv_processed.df, "liver [processed]",
                                           color.vc = RColorBrewer::brewer.pal(9, "Set1")[-1])
  
}

cv_compare_liver.gg <- gridExtra::grid.arrange(grobs = cv_compare_gg.ls[["liver"]],
                                               nrow = 1, ncol = 2)

cv_compare_plasma.gg <- gridExtra::grid.arrange(grobs = cv_compare_gg.ls[["plasma"]],
                                               nrow = 1, ncol = 2)

Plotting the male and female CV separately for each conditions, as computed on the processed data:

ppcv_ggplot.ls <- list()

for (set.c in names(pp.mset)) {
  
  pp.eset <- pp.mset[[set.c]]
  
  ppexprs.mn <- Biobase::exprs(pp.eset)
  
  pp_genesex.vc <- paste0(Biobase::pData(pp.eset)[, "gene"], "_",
                          Biobase::pData(pp.eset)[, "sex"])
  
  ppcv.mn <- t(apply(ppexprs.mn, 1,
                     function(feat.vn) {
                       tapply(feat.vn, pp_genesex.vc, function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE) * 100)
                     }))
  
  ppcv.df <- as.data.frame(tidyr::pivot_longer(tidyr::as_tibble(ppcv.mn),
                                               cols = 1:ncol(ppcv.mn),
                                               names_to = "features",
                                               values_to = "CV"))
  ppcv.df[, "gene"] <- factor(sapply(ppcv.df[, "features"],
                                     function(feat.c)
                                       unlist(strsplit(feat.c, "_"))[1]),
                              levels = c("WT", "LAT", "MX2"))
  ppcv.df[, "sex"] <- factor(sapply(ppcv.df[, "features"],
                                    function(feat.c)
                                      unlist(strsplit(feat.c, "_"))[2]),
                             levels = c("M", "F"))
  
  
  ppcv_ggplot.ls[[set.c]] <- ggplot2::ggplot(ppcv.df, ggplot2::aes(x = gene, y = CV, col = gene, fill = sex)) +
    ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
    ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
    ggplot2::scale_fill_manual(values = c(M = "lightblue2",
                                          F = "lightpink1")) +
    ggplot2::labs(title = set.c, x = "", y = "CV (%)") +
    ggplot2::theme_light() +
    ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
                   legend.title = ggplot2::element_blank(),
                   axis.title = ggplot2::element_text(size = 11, face = "bold"),
                   axis.text = ggplot2::element_text(size = 11, face = "bold"),
                   plot.title = ggplot2::element_text(size = 15, face = "bold"))
  
  # print(ppcv_ggplot.ls[[set.c]])
  
}

ppcv_ggplot.vc <- names(ppcv_ggplot.ls)
ppcv_ggplot.ls[["void"]] <- ggplot2::ggplot() + ggplot2::theme_void()
ppcv_ggplot.ls <- ppcv_ggplot.ls[c(ppcv_ggplot.vc[1],
                     "void",
                     ppcv_ggplot.vc[-1])]
ppcv_ggplot.gg <- gridExtra::grid.arrange(grobs = ppcv_ggplot.ls,
                                          nrow = ceiling(length(ProMetIS::sets.vc()) / 2), ncol = 2)

# plot(cvplot.gg)
ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS11_cv_sex.pdf", ppcv_ggplot.gg,
                width = 14, height = 27)

3.4.3 on imputations

imputed_mi.ls <- sapply(ProMetIS::proteo_sets.vc(),
                        function(proteo_set.c) {
                         ProMetIS:::.imputation_info(eset = pp.mset[[proteo_set.c]],
                                                     set.c = proteo_set.c) 
                        })

ppcvimp_ggplot.ls <- list()

for (set.c in grep("proteomics", names(pp.mset), value = TRUE)) {
  
  pp.eset <- pp.mset[[set.c]]
  
  ppexprs.mn <- Biobase::exprs(pp.eset)
  
  pp_genesex.vc <- paste0(Biobase::pData(pp.eset)[, "gene"], "_",
                          Biobase::pData(pp.eset)[, "sex"])
  
  imputed.ml <- imputed.mi <- imputed_mi.ls[[set.c]]
  mode(imputed.ml) <- "logical"
  
  for (imputed.l in c(TRUE, FALSE)) {
    
    ppexprsimp.mn <- ppexprs.mn
    
    if (!imputed.l) {
      ppexprsimp.mn[imputed.ml] <- NA_real_
    }
    
    cat("\n\n", set.c, ", ", imputed.l, ":", sep = "")
    cat("\nsum of values: ", sum(ppexprsimp.mn, na.rm = TRUE), sep = "")
    print(summary(c(ppexprsimp.mn)))
    
    if (imputed.l) {
    cat("\nnumber of imputated values: ", sum(imputed.mi),
        " (", round(sum(imputed.mi) / cumprod(dim(imputed.mi))[2] * 100), "%)", sep = "")
      print(summary(c(ppexprsimp.mn[imputed.ml])))
    }
    
    ppcvimp.mn <- t(apply(ppexprsimp.mn, 1,
                          function(feat.vn) {
                            tapply(feat.vn, pp_genesex.vc, function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE) * 100)
                          }))
    
    ppcvimp.df <- as.data.frame(tidyr::pivot_longer(tidyr::as_tibble(ppcvimp.mn),
                                                    cols = 1:ncol(ppcvimp.mn),
                                                    names_to = "features",
                                                    values_to = "CV"))
    ppcvimp.df[, "gene"] <- factor(sapply(ppcvimp.df[, "features"],
                                          function(feat.c)
                                            unlist(strsplit(feat.c, "_"))[1]),
                                   levels = c("WT", "LAT", "MX2"))
    ppcvimp.df[, "sex"] <- factor(sapply(ppcvimp.df[, "features"],
                                         function(feat.c)
                                           unlist(strsplit(feat.c, "_"))[2]),
                                  levels = c("M", "F"))
    
    
    p <- ggplot2::ggplot(ppcvimp.df, ggplot2::aes(x = gene, y = CV, col = gene, fill = sex)) +
      ggplot2::geom_boxplot(lwd = 1, outlier.size = 1) +
      ggplot2::scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1")[-1]) +
      ggplot2::scale_fill_manual(values = c(M = "lightblue2",
                                            F = "lightpink1")) +
      ggplot2::labs(title = paste0(set.c, " [", ifelse(imputed.l, "with", "without"), " imputation]"),
                    x = "", y = "CV (%)") +
      ggplot2::theme_light() +
      ggplot2::theme(legend.text = ggplot2::element_text(size = 11, face = "bold"),
                     legend.title = ggplot2::element_blank(),
                     axis.title = ggplot2::element_text(size = 11, face = "bold"),
                     axis.text = ggplot2::element_text(size = 11, face = "bold"),
                     plot.title = ggplot2::element_text(size = 15, face = "bold"))
    
    # print(p)
    
    ppcvimp_ggplot.ls[[paste0(set.c, "_", ifelse(imputed.l, "with", "without"), "_imputation]")]] <- p
  }
  
}
## 
## 
## proteomics_liver, TRUE:
## sum of values: 2126408   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.38   20.79   23.11   23.15   25.43   33.49 
## 
## number of imputated values: 2598 (3%)   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.62   18.32   19.30   19.11   19.84   24.01 
## 
## 
## proteomics_liver, FALSE:
## sum of values: 2076749   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   14.38   21.04   23.22   23.27   25.51   33.49    2598 
## 
## 
## proteomics_plasma, TRUE:
## sum of values: 360250   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.03   19.50   21.80   22.44   24.91   35.37 
## 
## number of imputated values: 797 (5%)   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.03   17.67   18.21   18.34   18.85   27.63 
## 
## 
## proteomics_plasma, FALSE:
## sum of values: 345629.1   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.35   19.81   22.04   22.65   25.12   35.37     797
ppcvimp_ggplot.gg <- gridExtra::grid.arrange(grobs = ppcvimp_ggplot.ls,
                                          nrow = 2, ncol = 2)

# plot(cvplot.gg)
ggplot2::ggsave("figures/article_data/ImbertEtAl_FigS12_cv_imp.pdf", ppcvimp_ggplot.gg,
                width = 14, height = 14)

4 Supplementary tables

for (set.c in names(pp.mset)) {
  # set.c <- names(pp.mset)[8]
  pp.eset <- pp.mset[[set.c]]
  
  if (grepl("metabolomics", set.c))
    pp.eset <- pp.eset[Biobase::fData(pp.eset)[, "name"] != "", ]
  
  if (which(names(pp.mset) == set.c) < 3) {
    xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
                                      Biobase::exprs(pp.eset)),
                     file = "figures/article_data/ImbertEtAl_2021_supp_table_part1.xlsx",
                     sheet = gsub("proteomics", "proteo",
                                  gsub("metabolomics", "metabo", set.c)),
                     append = which(names(pp.mset) == set.c) > 1)
  } else if (which(names(pp.mset) == set.c) < 5) {
    xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
                                      Biobase::exprs(pp.eset)),
                     file = "figures/article_data/ImbertEtAl_2021_supp_table_part2.xlsx",
                     sheet = gsub("proteomics", "proteo",
                                  gsub("metabolomics", "metabo", set.c)),
                     append = which(names(pp.mset) == set.c) > 3)
  } else if (which(names(pp.mset) == set.c) < 7) {
    xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
                                      Biobase::exprs(pp.eset)),
                     file = "figures/article_data/ImbertEtAl_2021_supp_table_part3.xlsx",
                     sheet = gsub("proteomics", "proteo",
                                  gsub("metabolomics", "metabo", set.c)),
                     append = which(names(pp.mset) == set.c) > 5)
  }  else if (which(names(pp.mset) == set.c) < 9) {
    xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
                                      Biobase::exprs(pp.eset)),
                     file = "figures/article_data/ImbertEtAl_2021_supp_table_part4.xlsx",
                     sheet = gsub("proteomics", "proteo",
                                  gsub("metabolomics", "metabo", set.c)),
                     append = which(names(pp.mset) == set.c) > 7)
  }  else {
    xlsx::write.xlsx(cbind.data.frame(Biobase::fData(pp.eset),
                                      Biobase::exprs(pp.eset)),
                     file = "figures/article_data/ImbertEtAl_2021_supp_table_part5.xlsx",
                     sheet = gsub("proteomics", "proteo",
                                  gsub("metabolomics", "metabo", set.c)),
                     append = which(names(pp.mset) == set.c) > 9)
  }
}